Imports and Data Loading¶

In [65]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
import statsmodels.api as sm
from scipy.stats import shapiro
import seaborn as sns
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.stattools import adfuller
from scipy.stats import shapiro, normaltest, probplot
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import LassoCV
from sklearn.linear_model import ElasticNetCV
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statsmodels.stats.stattools import durbin_watson
from sklearn.linear_model import RidgeCV, LassoCV, ElasticNetCV
from sklearn.ensemble import StackingRegressor
import statsmodels.api as sm
import seaborn as sns
In [66]:
# Path to the dataset
file_path = 'Dataset3_PortfolioReplicaStrategy.xlsx'

# Reading the Excel file with specific structure:
# - Row 4 contains full names
# - Row 6 contains Bloomberg tickers (note: Excel is 1-indexed, pandas is 0-indexed)
# - Data starts from row 7

data_raw = pd.read_excel(file_path)


# Set the date column as the index and ensure it's in datetime format
data_raw['Date'] = pd.to_datetime(data_raw['Ticker'], format='%d/%m/%Y')
data = data_raw.set_index('Date')

# Display the first few rows to examine the dataset structure
print("Dataset preview:")
display(data.head())

# Show the variable information (Ticker -> Full Name)
print("\nVariable Information (Bloomberg Ticker -> Full Name):")

# Show basic information about the dataset
print("\nDataset information:")
display(data.info())

# Display statistical summary of the dataset
print("\nBasic statistics:")
display(data.describe())
Dataset preview:
Ticker MXWO MXWD LEGATRUU HFRXGL RX1 TY1 GC1 CO1 ES1 VG1 NQ1 LLL1 TP1 DU1 TU2
Date
2007-10-23 2007-10-23 1633.44 414.14 350.2138 1343.63 113.70 110.515625 759.0 82.85 1525.50 4393.0 2212.00 1287.0 1570.5 103.385 103.718750
2007-10-30 2007-10-30 1663.89 423.26 352.5413 1356.53 113.79 110.656250 787.8 87.44 1536.00 4476.0 2217.75 1323.9 1610.5 103.410 103.812500
2007-11-06 2007-11-06 1651.59 419.51 354.1758 1360.20 113.79 110.875000 823.4 93.26 1525.00 4425.0 2233.50 1320.0 1575.5 103.410 104.046875
2007-11-13 2007-11-13 1601.81 405.98 357.2217 1347.16 114.35 111.718750 799.0 88.83 1483.25 4323.0 2066.75 1271.0 1440.5 103.595 104.304688
2007-11-20 2007-11-20 1570.74 398.54 359.4446 1335.21 114.72 113.156250 791.4 95.49 1446.00 4296.0 2035.50 1225.2 1472.5 103.800 104.945312
Variable Information (Bloomberg Ticker -> Full Name):

Dataset information:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 705 entries, 2007-10-23 to 2021-04-20
Data columns (total 16 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   Ticker    705 non-null    datetime64[ns]
 1   MXWO      705 non-null    float64       
 2   MXWD      705 non-null    float64       
 3   LEGATRUU  705 non-null    float64       
 4   HFRXGL    705 non-null    float64       
 5   RX1       705 non-null    float64       
 6   TY1       705 non-null    float64       
 7   GC1       705 non-null    float64       
 8   CO1       705 non-null    float64       
 9   ES1       705 non-null    float64       
 10  VG1       705 non-null    float64       
 11  NQ1       705 non-null    float64       
 12  LLL1      705 non-null    float64       
 13  TP1       705 non-null    float64       
 14  DU1       705 non-null    float64       
 15  TU2       705 non-null    float64       
dtypes: datetime64[ns](1), float64(15)
memory usage: 93.6 KB
None
Basic statistics:
Ticker MXWO MXWD LEGATRUU HFRXGL RX1 TY1 GC1 CO1 ES1 VG1 NQ1 LLL1 TP1 DU1 TU2
count 705 705.00000 705.000000 705.000000 705.000000 705.000000 705.000000 705.000000 705.000000 705.000000 705.000000 705.000000 705.000000 705.000000 705.000000 705.000000
mean 2014-07-22 00:00:00 1644.12566 404.151915 452.203206 1210.092071 147.278837 125.966977 1317.902837 77.018496 1960.443050 3072.167631 4526.981206 1064.444113 1277.263121 110.221277 108.627682
min 2007-10-23 00:00:00 705.35000 176.100000 342.247100 1021.400000 110.200000 110.515625 732.700000 19.330000 689.500000 1873.000000 1072.000000 465.100000 704.000000 102.190000 103.718750
25% 2011-03-08 00:00:00 1293.43000 328.790000 431.737400 1158.150000 128.900000 121.203125 1178.000000 54.270000 1314.500000 2753.000000 2244.000000 1103.600000 905.000000 109.230000 107.871094
50% 2014-07-22 00:00:00 1646.69000 402.570000 456.728800 1215.200000 148.000000 126.000000 1283.000000 72.050000 1909.250000 3071.000000 3884.750000 1103.600000 1302.500000 110.855000 109.093750
75% 2017-12-05 00:00:00 1978.83000 478.410000 478.186800 1254.250000 163.240000 130.343750 1499.700000 105.780000 2569.750000 3433.000000 6333.250000 1103.600000 1587.500000 111.990000 109.906250
max 2021-04-20 00:00:00 2944.05000 702.990000 559.311700 1415.110000 179.320000 140.328125 2001.200000 140.670000 4132.800000 4476.000000 13975.750000 1323.900000 1978.000000 112.720000 110.527344
std NaN 443.39041 100.980126 47.338067 73.965200 19.318527 6.295242 274.477979 26.794278 770.070634 473.812729 2900.866833 128.954655 353.598276 2.374875 1.532632
In [ ]:
found_index=['MXWO','MXWD','LEGATRUU','HFRXGL']
futures=data.drop(columns=found_index)
found_index=['Ticker','MXWO','MXWD','LEGATRUU','HFRXGL']
hedge_fd=data.loc[:,found_index]
dates=hedge_fd.iloc[:,0]

#Getting the notmalized prices of the hedge founds
#prices_hf=hedge_fd.iloc[:,1:]#/hedge_fd.iloc[0,1:] #normalized prices
initial_price=hedge_fd.iloc[0,1:]

#Calculate the returns
returns = (hedge_fd.iloc[1:, 1:].values/ hedge_fd.iloc[:-1, 1:].values) - 1
returns = pd.DataFrame(returns, columns=hedge_fd.columns[1:])

Monster Index Composition¶

In [68]:
#Fund composition
w_MXWO=0.25
w_LEGATRUU=0.25
w_HFRXGL=0.5

#Computing normalized prices and prices of our Monster Index
y_price=w_MXWO*hedge_fd.iloc[:,1]+w_LEGATRUU*hedge_fd.iloc[:,2]+w_HFRXGL*hedge_fd.iloc[:,3] #weighted sum of components in the fund (indices are 1, 2, 3 for w_MXWO, w_LEGATRUU, and w_HFRXGL)
y_values = pd.DataFrame(y_price, columns=['Target']) 
y_price=y_values/y_values.iloc[0] #normalizing price fluctuations

#Computing returns of our monster index, returns_i = (price_i/price_{i-1}) 
returns_MI = (y_price.iloc[1:].values/ y_price.iloc[:-1].values) - 1
returns_MI = pd.DataFrame(returns_MI, columns=['Target'])
returns_MI['Target'] = pd.to_numeric(returns_MI['Target'] , errors='coerce')

#Futures Prices & Returns
returns_futures = (futures.iloc[1:, 1:].values/ futures.iloc[:-1, 1:].values) - 1
returns_futures = pd.DataFrame(returns_futures, columns=futures.columns[1:])
price_futures=futures.iloc[:,1:]
price_futures_std=futures.iloc[:,1:]/futures.iloc[0,1:] #normalized prices of futures
In [69]:
#Plot
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 4))

#Plot Prices
ax1.plot( price_futures_std,)
ax1.plot(y_price, label=('Monster index'), linewidth=4,c='b')
ax1.set_xlabel('Time')
ax1.set_ylabel('Normalized prices')
ax1.set_title('Normalized prices of futures')
ax1.legend()

#Plot Returns
ax2.plot(dates[1:] , returns_futures,)
ax2.legend()
ax2.set_xlabel('Time')
ax2.set_ylabel('Returns')
ax2.set_title('Returns of the Futures')

plt.show()
C:\Users\ericb\AppData\Local\Temp\ipykernel_27552\4269529687.py:14: UserWarning: No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
  ax2.legend()
No description has been provided for this image

Replicating the Fund Returns¶

Problem Statement:¶

The problem that has to be solved is finding the mixture that produces the return of the "monster" fund. This problem can be understood as a vector of returns posisble components ($\mathbf{X}$) and a vector with the composition of the index ($\beta_{Monster}$) that yield the target returns ($\mathbf{y}$): $\mathbf{X}^T\beta_{Monster} = \mathbf{y}$.

Since $\beta_{Monster}$ is unknown, the replication problem comes from the ability to approximate as accurately as possible those parameters. In this section we will apply different methods that can help us estimate $\beta_{Monster}$.

In [70]:
# Data preparation
#data:= y_1 | x_1_1 | x_2_1 ... | x_11_1 
#       y_2 | x_1_2 | x_2_2 ... | x_11_2
#       ...    ...    ...         ...
#       y_704 | x_1_704 | x_2_704 ... | x_11_704

data = pd.concat([returns_MI, returns_futures], axis=1)

Ridge Regression¶

Here we do an estimation of fund composition using ridge regression, which is a type of linear regression that introduces a L2 norm regularization term as to avoid overfitting with large parameters: $ \lambda \| \boldsymbol{\beta} \|_2^2$, with $\lambda$ being a hyperparameter that adjusts the penalization for large parameters. The optimization problem that this regression presents to find the aprroximated $\beta_{monster}$, $\beta$ is the following:

\begin{equation} \min_{\boldsymbol{\beta}} \left\{ \| \mathbf{y} - \mathbf{X} \boldsymbol{\beta} \|_2^2 + \lambda \| \boldsymbol{\beta} \|_2^2 \right\} \end{equation}

Our methodology here consists on fitting the regression to 150 data points (150 weeks, 1 point per week) to then used the found weights to compute the returns for one week.

In [71]:
# Initialize lists to store predictions and actual values
ridge_y_pred = []
ridge_y_test = []
ridge_coefficients = []

# Moving window regression
window_size = 150
for i in range(window_size, len(data)):
    # Train Ridge regression with cross-validation
    # intercept is false, we only want to find the optimal mixture
    ridge_cv_model = RidgeCV(cv=5, fit_intercept=False)
    ridge_cv_model.fit(data.iloc[i-window_size:i].drop(columns='Target'), data.iloc[i-window_size:i]['Target'])

    # Predict the next value
    ridge_y_pred.append(ridge_cv_model.predict(data.iloc[i:i+1].drop(columns='Target'))[0])
    ridge_y_test.append(data.iloc[i]['Target'])

    # Store coefficients
    ridge_coefficients.append(ridge_cv_model.coef_)

# Ensure dates align with the predictions
aligned_dates = dates[window_size+1:]  # Adjust dates to match the length of predictions

Plot of the results for the Ridge regression

In [72]:
# Plot results
plt.figure(figsize=(14, 8), dpi=300)
plt.plot(aligned_dates, ridge_y_test, color='blue', label='Actual Target')
plt.plot(aligned_dates, ridge_y_pred, 'g--', color='red', label='Ridge Prediction')
plt.xlabel('Date')
plt.ylabel('Returns')
plt.title('Ridge Regression: Prediction vs Actual')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
C:\Users\ericb\AppData\Local\Temp\ipykernel_27552\749008089.py:4: UserWarning: color is redundantly defined by the 'color' keyword argument and the fmt string "g--" (-> color='g'). The keyword argument will take precedence.
  plt.plot(aligned_dates, ridge_y_pred, 'g--', color='red', label='Ridge Prediction')
No description has been provided for this image

Lasso Regression¶

In this approach we try a different regression approach that also introduces a regularization term, but on the absolut value of the parameters (L1 norm), which has the objective on erasing parameters that are not significant enough, with a hyperparameter $\lambda$ that also that adjusts the effects of the regularization. The optimization problem that this regression presents to find the aprroximated $\beta_{monster}$, $\beta$ is the following:

\begin{equation} \min_{\boldsymbol{\beta}} \left\{ \| \mathbf{y} - \mathbf{X} \boldsymbol{\beta} \|_2^2 + \lambda \| \boldsymbol{\beta} \|_1 \right\} \end{equation}

Our methodology consists too on fitting the regression to 150 data points (150 weeks, 1 point per week) to then used the found weights to compute the returns for one week. -->

In [73]:
# Initialize lists to store predictions and actual values
lasso_y_pred = []
lasso_y_test = []
lasso_coefficients = []

# Moving window regression
window_size = 150
for i in range(window_size, len(data)):
    # Train Lasso regression with cross-validation
    # intercept is false, we only want to find the optimal mixture
    # max iteration set to a high enough value for convergence, no analytical solution exists for lasso regression, L1 norm is not differentiable, iterative methods are used
    lasso_cv_model = LassoCV(cv=5, max_iter=10000, fit_intercept=False)
    lasso_cv_model.fit(data.iloc[i-window_size:i].drop(columns='Target'), data.iloc[i-window_size:i]['Target'])

    # Predict the next value
    lasso_y_pred.append(lasso_cv_model.predict(data.iloc[i:i+1].drop(columns='Target'))[0])
    lasso_y_test.append(data.iloc[i]['Target'])

    # Store coefficients
    lasso_coefficients.append(lasso_cv_model.coef_)

# Ensure dates align with the predictions
aligned_dates = dates[window_size+1:]  # Adjust dates to match the length of predictions

Plot of the results for the Lasso regression

In [74]:
# Plot results
plt.figure(figsize=(14, 8), dpi=300)
plt.plot(aligned_dates, lasso_y_test, color='blue', label='Actual Target')
plt.plot(aligned_dates, lasso_y_pred, 'g--', color='red', label='Lasso Prediction')
plt.xlabel('Date')
plt.ylabel('Returns')
plt.title('Lasso Regression: Prediction vs Actual')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
C:\Users\ericb\AppData\Local\Temp\ipykernel_27552\411379268.py:4: UserWarning: color is redundantly defined by the 'color' keyword argument and the fmt string "g--" (-> color='g'). The keyword argument will take precedence.
  plt.plot(aligned_dates, lasso_y_pred, 'g--', color='red', label='Lasso Prediction')
No description has been provided for this image

Elastic Net Regression¶

Finally we implement the Elastic Net regression model, which combines the Lasso and Ridge regressions regularization terms to mix both. Elastic Net has two hyperparameters, $\lambda_1$ for the L1 norm and $\lambda_2$ for the L2 norm. The optimization problem for the Elastic Net is the following:

\begin{equation} \min_{\boldsymbol{\beta}} \left\{ \| \mathbf{y} - \mathbf{X} \boldsymbol{\beta} \|_2^2 + \lambda_1 \| \boldsymbol{\beta} \|_1 + \lambda_2 \| \boldsymbol{\beta} \|_2^2 \right\} \end{equation}

The same methodology of fiting to 150 points and predict one has been applied:

In [75]:
# Initialize lists to store predictions and actual values
elasticnet_y_pred = []
elasticnet_y_test = []
elasticnet_coefficients = []


# Moving window regression
window_size = 150
for i in range(window_size, len(data)):
    # Train Elastic Net regression with cross-validation
    elasticnet_cv_model = ElasticNetCV(cv=5, fit_intercept=False)
    elasticnet_cv_model.fit(data.iloc[i-window_size:i].drop(columns='Target'), data.iloc[i-window_size:i]['Target'])

    # Predict the next value (1 week)
    elasticnet_y_pred.append(elasticnet_cv_model.predict(data.iloc[i:i+1].drop(columns='Target'))[0])
    elasticnet_y_test.append(data.iloc[i]['Target'])

    # Store coefficients
    elasticnet_coefficients.append(elasticnet_cv_model.coef_)

# Ensure dates align with the predictions
aligned_dates = dates[window_size+1:]  # Adjust dates to match the length of predictions

Plot of the results of Elastic Net regression:

In [76]:
# Plot results
plt.figure(figsize=(14, 8), dpi=300)
plt.plot(aligned_dates, elasticnet_y_test, color='blue', label='Actual Target')
plt.plot(aligned_dates, elasticnet_y_pred, 'g--', color='red', label='Elastic Net Prediction')
plt.xlabel('Date')
plt.ylabel('Returns')
plt.title('Elastic Net Regression: Prediction vs Actual')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
C:\Users\ericb\AppData\Local\Temp\ipykernel_27552\1476864712.py:4: UserWarning: color is redundantly defined by the 'color' keyword argument and the fmt string "g--" (-> color='g'). The keyword argument will take precedence.
  plt.plot(aligned_dates, elasticnet_y_pred, 'g--', color='red', label='Elastic Net Prediction')
No description has been provided for this image

Ensemble Regression¶

In this approach, we move do not rely on a single regression model and instead we construct a weighted combination of multiple models: Ridge, Lasso, and Elastic Net.

Rather than solving a single optimization problem directly, we run three separate regularized regressions and combine their results using error-based weights that vary at each step of the rolling window. The idea is that if one model is performing better than any separate model by taking into account their error values

At eacth step of our prediction we define the ensemble method as:

$$ \hat{y}^{\text{ensemble}}_t = w^{\text{ridge}}_t \cdot \hat{y}^{\text{ridge}}_t + w^{\text{lasso}}_t \cdot \hat{y}^{\text{lasso}}_t + w^{\text{elastic}}_t \cdot \hat{y}^{\text{elastic}}_t $$

Where the weights ($w$) are computed as:

$$ w^{(m)}_t = \frac{1 / \text{error}^{(m)}_t}{1 / \text{error}^{(ridge)}_t + 1 / \text{error}^{(elastic)}_t + 1 / \text{error}^{(Lasso)}_t} $$

Were $\text{error}^{(m)}_t$ is the absolute error made by model $m$ at the most recent training point. Giving weights to the models that are inversely proportional to the errors.

The coefficients are also ensembled as follows:

$$ \boldsymbol{\beta}^{\text{ensemble}}_t = w^{\text{ridge}}_t \cdot \boldsymbol{\beta}^{\text{ridge}}_t + w^{\text{lasso}}_t \cdot \boldsymbol{\beta}^{\text{lasso}}_t + w^{\text{elastic}}_t \cdot \boldsymbol{\beta}^{\text{elastic}}_t $$

Our methodology, as with the base regressions, consists in fitting each model on a rolling window of 150 data points (150 weeks), using the fitted weights to compute the predicted return for the next week.

In [77]:
initial_price = w_MXWO * hedge_fd.iloc[0, 1] + w_LEGATRUU * hedge_fd.iloc[0, 2] + w_HFRXGL * hedge_fd.iloc[0, 3]

# Initialize lists to store predictions, actual values, coefficients, and VaR for each model
ensemble_y_pred = []
ensemble_y_test = []
ensemble_coefficients = []

# Moving window regression
window_size = 150

point=0 # keep count to access previously computed predictions and coefficients

for i in range(window_size, len(data)-1): # move the window

    X_train = data.iloc[i-window_size:i].drop(columns='Target') #get reurns data in the window
    y_train = data.iloc[i-window_size:i]['Target'] #get index returns in the window

    X_test = data.iloc[i:i+1].drop(columns='Target') #get next data point (week to predict)
    ensemble_y_test.append(data.iloc[i]['Target'])

    # Calculate errors for weighting
    ridge_error = np.abs(ridge_y_pred[point] - y_train.iloc[-1])
    lasso_error = np.abs(lasso_y_pred[point] - y_train.iloc[-1])
    elasticnet_error = np.abs(elasticnet_y_pred[point] - y_train.iloc[-1])

    # Calculate weights based on inverse errors using our formula
    total_error = ridge_error + lasso_error + elasticnet_error
    ridge_weight = (1 / ridge_error) / (1 / ridge_error + 1 / lasso_error + 1 / elasticnet_error)
    lasso_weight = (1 / lasso_error) / (1 / ridge_error + 1 / lasso_error + 1 / elasticnet_error)
    elasticnet_weight = (1 / elasticnet_error) / (1 / ridge_error + 1 / lasso_error + 1 / elasticnet_error)

    # Calculate the ensemble prediction, we already have the info for the next point from previous sections, so we will retrieve it and compute the result
    ensemble_pred = (ridge_weight * ridge_y_pred[point + 1] +
                     lasso_weight * lasso_y_pred[point + 1] +
                     elasticnet_weight * elasticnet_y_pred[point + 1])
    ensemble_y_pred.append(ensemble_pred)

    # Calculate the ensemble coefficients, weighted sum
    ensemble_coef = (ridge_weight * ridge_coefficients[point] +
                     lasso_weight * lasso_coefficients[point] +
                     elasticnet_weight * elasticnet_coefficients[point])
    ensemble_coefficients.append(ensemble_coef)

    point += 1


ensemble_y_test.append(data.iloc[-1]['Target'])
ensemble_y_pred = [None] + ensemble_y_pred # to fill in for the mismatch between the other predictions and the ensemble prediction size (ensemble has 1 less)
ensemble_coefficients = [None] + ensemble_coefficients

Plot of the results of Ensemble regression:

In [78]:
# Plot results
plt.figure(figsize=(14, 8), dpi=300)
plt.plot(aligned_dates, ensemble_y_test, color='blue', label='Actual Target')
plt.plot(aligned_dates, ensemble_y_pred, 'g--', color='red', label='Ensemble Prediction')
plt.xlabel('Date')
plt.ylabel('Returns')
plt.title('Ensemble Regression: Prediction vs Actual')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
C:\Users\ericb\AppData\Local\Temp\ipykernel_27552\8087280.py:4: UserWarning: color is redundantly defined by the 'color' keyword argument and the fmt string "g--" (-> color='g'). The keyword argument will take precedence.
  plt.plot(aligned_dates, ensemble_y_pred, 'g--', color='red', label='Ensemble Prediction')
No description has been provided for this image

Value At Risk¶

Analysis of the value at risk (99th percentile) of the different replication methods

In [79]:
# declaration of lists at which VaR will be appended
ensemble_var = []
ridge_var = []
lasso_var = []
elasticnet_var = []

point=0 #keep track of what values to retrieve from the model values
for i in range(window_size, len(data)): # move the window
    #getting the points for the window
    X_train = data.iloc[i-window_size:i].drop(columns='Target')

    # Calculate portfolio returns using the current coefficients for the whole window
    ridge_returns = X_train.dot(ridge_coefficients[point])
    lasso_returns = X_train.dot(lasso_coefficients[point])
    elasticnet_returns = X_train.dot(elasticnet_coefficients[point])

    # Calculate portfolio returns using the ensemble coefficients
    portfolio_returns = (ridge_weight * ridge_returns +
                         lasso_weight * lasso_returns +
                         elasticnet_weight * elasticnet_returns)
    
    # Calculate VaR for the portfolio returns for each model, 99th percentile
    ridge_var.append(np.quantile(ridge_returns, 0.99))
    lasso_var.append(np.quantile(lasso_returns, 0.99))
    elasticnet_var.append(np.quantile(elasticnet_returns, 0.99))

    # Calculate VaR for the ensemble portfolio returns
    ensemble_var.append(np.quantile(portfolio_returns, 0.99))

    point +=1

Plot of VaR over time:

In [80]:
# Plot VaR over time
plt.figure(figsize=(12, 5), dpi=300)
plt.plot(aligned_dates, ridge_var, color='red', label='Ridge VaR')
plt.plot(aligned_dates, lasso_var, color='green', label='Lasso VaR')
plt.plot(aligned_dates, elasticnet_var, color='purple', label='Elastic Net VaR')
plt.plot(aligned_dates, ensemble_var, color='orange', label='Ensemble VaR')
plt.xlabel('Date')
plt.ylabel('VaR')
plt.title('Value at Risk (VaR) over Time')
plt.legend()
plt.tight_layout()
plt.xticks(rotation=45)
plt.show()
No description has been provided for this image

Performance comparison for different regressions¶

In [94]:
# Convert predictions and actual values to numpy arrays for easier manipulation
ridge_y_pred_array = np.array(ridge_y_pred)
ridge_y_test_array = np.array(ridge_y_test)
lasso_y_pred_array = np.array(lasso_y_pred)
lasso_y_test_array = np.array(lasso_y_test)
elasticnet_y_pred_array = np.array(elasticnet_y_pred)
elasticnet_y_test_array = np.array(elasticnet_y_test)
ensemble_y_test_array = np.array(ensemble_y_test)
ensemble_y_pred_array = np.array(ensemble_y_pred)


# Performance metrics:
#   MSE = (\sum_{i=1}^{i=N}(y_pred_i - y_i)^2)/N Mean of Squared Errors of the returns of approximated the fund vs the actual fund
#   MAE = (\sum_{i=1}^{i=N}|y_pred_i - y_i|)/N Mean of Absolute Errors of the returns of approximated the fund vs the actual fund
#   Tracking Error = Y_pred - Y, vector of the differences between the predicted ones and the actual ones
#   Tracking Error Volatility = Var(Tracking Error)*sqrt(time increment in years), the volatility of the tracking error, wach measure is one week (52 weeks in a year)
#   Information ratio = (Mean of the Tracking Error)/(Tracking Error Volatility), a risk-adjusted metric of the tracking performance

# Compute performance metrics for Ridge Regression
MSE_Ridge = mean_squared_error(ridge_y_test_array, ridge_y_pred_array)
MAE_Ridge = mean_absolute_error(ridge_y_test_array, ridge_y_pred_array)
tracking_error_Ridge = ridge_y_test_array - ridge_y_pred_array
tracking_error_volatility_Ridge = np.std(tracking_error_Ridge) * np.sqrt(52)
information_ratio_Ridge = np.mean(tracking_error_Ridge) / tracking_error_volatility_Ridge

# Compute performance metrics for Lasso Regression
MSE_Lasso = mean_squared_error(lasso_y_test_array, lasso_y_pred_array)
MAE_Lasso = mean_absolute_error(lasso_y_test_array, lasso_y_pred_array)
tracking_error_Lasso = lasso_y_test_array - lasso_y_pred_array
tracking_error_volatility_Lasso = np.std(tracking_error_Lasso) * np.sqrt(52)
information_ratio_Lasso = np.mean(tracking_error_Lasso) / tracking_error_volatility_Lasso

# Compute performance metrics for Elastic Net Regression
MSE_ElasticNet = mean_squared_error(elasticnet_y_test_array, elasticnet_y_pred_array)
MAE_ElasticNet = mean_absolute_error(elasticnet_y_test_array, elasticnet_y_pred_array)
tracking_error_ElasticNet = elasticnet_y_test_array - elasticnet_y_pred_array
tracking_error_volatility_ElasticNet = np.std(tracking_error_ElasticNet) * np.sqrt(52)
information_ratio_ElasticNet = np.mean(tracking_error_ElasticNet) / tracking_error_volatility_ElasticNet

# Turnover calculation, to estimate transaction costs. 
# We define turnover as a half of the re-adjustment of weights, as to avoid double counting buy and sell operations
# Assumed standard transaction costs of 2, to 4 bps

transactionCosts = [0.0002, 0.0004]
mean_transaction_cost = np.mean(transactionCosts)


# Assuming weights and turnover calculations are similar for all models
turnover_Ridge = np.sum(np.abs(np.diff(np.array(ridge_coefficients), axis=0)), axis=1) / 2
mean_annual_turnover_Ridge = np.mean(turnover_Ridge) * 52
mean_annual_transaction_cost_Ridge = mean_annual_turnover_Ridge * mean_transaction_cost
mean_gross_exposure_Ridge = np.mean(np.sum(np.abs(np.array(ridge_coefficients)), axis=1))

turnover_Lasso = np.sum(np.abs(np.diff(np.array(lasso_coefficients), axis=0)), axis=1) / 2
mean_annual_turnover_Lasso = np.mean(turnover_Lasso) * 52
mean_annual_transaction_cost_Lasso = mean_annual_turnover_Lasso * mean_transaction_cost
mean_gross_exposure_Lasso = np.mean(np.sum(np.abs(np.array(lasso_coefficients)), axis=1))

turnover_ElasticNet = np.sum(np.abs(np.diff(np.array(elasticnet_coefficients), axis=0)), axis=1) / 2
mean_annual_turnover_ElasticNet = np.mean(turnover_ElasticNet) * 52
mean_annual_transaction_cost_ElasticNet = mean_annual_turnover_ElasticNet * mean_transaction_cost
mean_gross_exposure_ElasticNet = np.mean(np.sum(np.abs(np.array(elasticnet_coefficients)), axis=1))

# Compute performance metrics for Ensemble Regression, we take the arrays from the second position, the first position contains None due to the ensemble needing a previous value of error from the other regressions
MSE_Ensemble = mean_squared_error(ensemble_y_test_array[1:], ensemble_y_pred_array[1:])
MAE_Ensemble = mean_absolute_error(ensemble_y_test_array[1:], ensemble_y_pred_array[1:])
tracking_error_Ensemble = ensemble_y_test_array[1:] - ensemble_y_pred_array[1:]
tracking_error_volatility_Ensemble = np.std(tracking_error_Ensemble) * np.sqrt(52)
information_ratio_Ensemble = np.mean(tracking_error_Ensemble) / tracking_error_volatility_Ensemble

# Calculate exact turnover for ensemble model
ensemble_coefficients_array = np.array(ensemble_coefficients[1:])
turnover_Ensemble = np.sum(np.abs(np.diff(ensemble_coefficients_array, axis=0)), axis=1) / 2
mean_annual_turnover_Ensemble = np.mean(turnover_Ensemble) * 52

mean_annual_transaction_cost_Ensemble = mean_annual_turnover_Ensemble * mean_transaction_cost
mean_gross_exposure_Ensemble = np.mean(np.sum(np.abs(ensemble_coefficients_array), axis=1))

# Create DataFrame for metrics
metrics_data = {
    'Metric': ['MSE', 'MAE', 'Tracking Error Volatility', 'Information Ratio', 'Mean Annual Turnover', 'Mean Annual Transaction Cost', 'Gross Exposure', 'VaR %'],
    'Ridge Regression': [MSE_Ridge, MAE_Ridge, tracking_error_volatility_Ridge, information_ratio_Ridge, mean_annual_turnover_Ridge, mean_annual_transaction_cost_Ridge, mean_gross_exposure_Ridge, f"{mean_var_ridge * 100:.2f}%"],
    'Lasso Regression': [MSE_Lasso, MAE_Lasso, tracking_error_volatility_Lasso, information_ratio_Lasso, mean_annual_turnover_Lasso, mean_annual_transaction_cost_Lasso, mean_gross_exposure_Lasso, f"{mean_var_lasso * 100:.2f}%"],
    'Elastic Net Regression': [MSE_ElasticNet, MAE_ElasticNet, tracking_error_volatility_ElasticNet, information_ratio_ElasticNet, mean_annual_turnover_ElasticNet, mean_annual_transaction_cost_ElasticNet, mean_gross_exposure_ElasticNet, f"{mean_var_elasticnet * 100:.2f}%"],
    'Ensemble Regression': [MSE_Ensemble, MAE_Ensemble, tracking_error_volatility_Ensemble, information_ratio_Ensemble, mean_annual_turnover_Ensemble, mean_annual_transaction_cost_Ensemble, mean_gross_exposure_Ensemble, f"{mean_var_ensemble * 100:.2f}%"]
}

Display the resutls

In [95]:
# Construct the DataFrame
metrics_df = pd.DataFrame(metrics_data)
# Print the table
display(metrics_df)
Metric Ridge Regression Lasso Regression Elastic Net Regression Ensemble Regression
0 MSE 0.000046 0.000019 0.000019 0.000024
1 MAE 0.004811 0.003207 0.003227 0.003534
2 Tracking Error Volatility 0.048582 0.031208 0.031454 0.035427
3 Information Ratio 0.007019 -0.003356 -0.003534 0.001727
4 Mean Annual Turnover 0.164505 3.147135 2.023006 5.185754
5 Mean Annual Transaction Cost 0.000049 0.000944 0.000607 0.001556
6 Gross Exposure 0.522823 1.356464 1.225063 1.004919
7 VaR % 2.41% 3.25% 3.25% 2.99%

Among the "simple" regressions, Lasso has the lowest MSE and MAE, and combined with the smalles tracking error volatility shows that is the method that can better track the fund.

However, Lasso and elastic net have a higher mean transaction cost and Gross exposure, making them more expensive to implement.

The enseble regressoion is a compromise between all models and it shows in the resulting metrics, with a slight decrease in error terms from the Lasso regression, but an improvement over the exposure and VaR, it is a nice compromise, however the transaction costs are more significant than in any other model.

Visualizing price evolution¶

In [96]:
# Initial price calculation based on weights and hedge fund data
initial_price = w_MXWO * hedge_fd.iloc[0, 1] + w_LEGATRUU * hedge_fd.iloc[0, 2] + w_HFRXGL * hedge_fd.iloc[0, 3]

# Convert returns to prices
ridge_prices = [initial_price * (1 + r) for r in np.cumsum(ridge_y_pred)]
lasso_prices = [initial_price * (1 + r) for r in np.cumsum(lasso_y_pred)]
elasticnet_prices = [initial_price * (1 + r) for r in np.cumsum(elasticnet_y_pred)]
ensemble_prices = [initial_price * (1 + r) for r in np.cumsum(ensemble_y_pred[1:])]
ensemble_prices.insert(0,None) # dealing with shape diference
actual_prices = [initial_price * (1 + r) for r in np.cumsum(ensemble_y_test)]

# Ensure dates align with the predictions
aligned_dates = dates[window_size+1:]  # Adjust dates to match the length of predictions

# Plot results for Ridge, Lasso, Elastic Net, and Ensemble Predictions
plt.figure(figsize=(14, 6), dpi=350)
plt.plot(aligned_dates, ridge_prices, color='red', label='Ridge Replicated Price')
plt.plot(aligned_dates, elasticnet_prices, color='orange', label='Elastic Net Replicated Price')
plt.plot(aligned_dates, ensemble_prices, color='blue', label='Ensemble Replicated Price',linewidth=2)
plt.plot(aligned_dates, actual_prices, color='purple', label='Actual Target Price')
plt.plot(aligned_dates, lasso_prices, 'g--', color='green', label='Lasso Replicated Price')

plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Replicated Prices vs Actual Prices')
plt.legend()
plt.xticks(rotation=45)

plt.show()
C:\Users\ericb\AppData\Local\Temp\ipykernel_27552\3669581631.py:21: UserWarning: color is redundantly defined by the 'color' keyword argument and the fmt string "g--" (-> color='g'). The keyword argument will take precedence.
  plt.plot(aligned_dates, lasso_prices, 'g--', color='green', label='Lasso Replicated Price')
No description has been provided for this image

Introducing Transaction Costs¶

With transaction costs playing an important role in the returns, it is important to take them into consideration. We have chosen a 3 bps transaction cost that will be applied to any re-adjustment.

To do so we use the function eta_rescale, that will return the new weights, where they will only be updated if the difference between that weight and the new one is bigger than a hyperparameter $\eta$

In [258]:
def eta_weight(old_weights, new_weights, eta=0.075):
    total_change = np.sum(np.abs(new_weights - old_weights))
    if total_change > eta:
        return new_weights  # weights changed
    else:
        return old_weights  # weights unchanged
    
def transaction_cost_on_return(toadjust, tcost=0.03):
    return np.sum(toadjust*tcost) #cost in terms of returns (3 bps at standard)


def returns_with_costs_etaT(retorns, weights, eta=0.0075, transactionCost=0.003):
    final_weights = [weights[0]]
    netreturns = [weights[0].dot(retorns[0]).item()]  # # Ensure a scalar, gave problems due to small numbers having diferent formats


    for t in range(1, len(weights)):
        newW = eta_weight(final_weights[t-1], weights[t], eta)
        difference = np.abs(newW - final_weights[t-1])
        netreturn = newW.dot(retorns[t]).item() - transaction_cost_on_return(difference, transactionCost)
        netreturns.append(netreturn)
        final_weights.append(newW)
                
    return final_weights, netreturns

Always adjusting with transaction costs (eta=0)

In [259]:
returns = data.iloc[window_size:].drop(columns='Target').values

_, ridge_returns_adj = returns_with_costs_etaT(returns, ridge_coefficients, eta=0.0)
ridge_prices_withT = [initial_price * (1 + r) for r in np.cumsum(ridge_returns_adj)]

_, lasso_returns_adj = returns_with_costs_etaT(returns, lasso_coefficients, eta=0.0)
lasso_prices_withT = [initial_price * (1 + r) for r in np.cumsum(lasso_returns_adj)]

_, elasticnet_returns_adj = returns_with_costs_etaT(returns, elasticnet_coefficients, eta=0.0)
elasticnet_prices_withT = [initial_price * (1 + r) for r in np.cumsum(elasticnet_returns_adj)]

_, ensemble_returns_adj = returns_with_costs_etaT(returns[1:], ensemble_coefficients[1:], eta=0.0)
ensemble_prices_withT = [initial_price * (1 + r) for r in np.cumsum(ensemble_returns_adj)]

Visualizing transaction costs on returns

In [268]:
plt.figure(figsize=(10, 6), dpi=300)
plt.plot(aligned_dates, elasticnet_y_test, color='blue', label='Actual Target')
plt.plot(aligned_dates, ridge_returns_adj, color='green', label='Ridge Transaction Prediction')
plt.plot(aligned_dates, ridge_y_pred, 'g--', color='red', label='Ridge Prediction')
plt.xlabel('Date')
plt.ylabel('Returns')
plt.title('Ridge Regression: Prediction vs Actual')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
C:\Users\ericb\AppData\Local\Temp\ipykernel_27552\2512586626.py:4: UserWarning: color is redundantly defined by the 'color' keyword argument and the fmt string "g--" (-> color='g'). The keyword argument will take precedence.
  plt.plot(aligned_dates, ridge_y_pred, 'g--', color='red', label='Ridge Prediction')
No description has been provided for this image
In [269]:
plt.figure(figsize=(10, 6), dpi=300)
plt.plot(aligned_dates, elasticnet_y_test, color='blue', label='Actual Target')
plt.plot(aligned_dates, lasso_returns_adj, color='green', label='Lasso Transaction Prediction')
plt.plot(aligned_dates, lasso_y_pred, 'g--', color='red', label='Lasso Prediction')
plt.xlabel('Date')
plt.ylabel('Returns')
plt.title('Lasso Regression: Prediction vs Actual')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
C:\Users\ericb\AppData\Local\Temp\ipykernel_27552\732774158.py:4: UserWarning: color is redundantly defined by the 'color' keyword argument and the fmt string "g--" (-> color='g'). The keyword argument will take precedence.
  plt.plot(aligned_dates, lasso_y_pred, 'g--', color='red', label='Lasso Prediction')
No description has been provided for this image
In [272]:
# Plot results
plt.figure(figsize=(10, 6), dpi=300)
plt.plot(aligned_dates, elasticnet_y_test, color='blue', label='Actual Target')
plt.plot(aligned_dates, elasticnet_returns_adj, color='green', label='Elastic Net transaction Prediction')
plt.plot(aligned_dates, elasticnet_y_pred, 'g--', color='red', label='Elastic Net Prediction')
plt.xlabel('Date')
plt.ylabel('Returns')
plt.title('Elastic Net Regression: Prediction vs Actual')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
C:\Users\ericb\AppData\Local\Temp\ipykernel_27552\3673557002.py:5: UserWarning: color is redundantly defined by the 'color' keyword argument and the fmt string "g--" (-> color='g'). The keyword argument will take precedence.
  plt.plot(aligned_dates, elasticnet_y_pred, 'g--', color='red', label='Elastic Net Prediction')
No description has been provided for this image
In [271]:
plt.figure(figsize=(10, 6), dpi=300)
plt.plot(aligned_dates, elasticnet_y_test, color='blue', label='Actual Target')
plt.plot(aligned_dates[1:], ensemble_returns_adj, color='green', label='Ensemble Transaction Prediction')
plt.plot(aligned_dates, ensemble_y_pred, 'g--', color='red', label='Ensemble Prediction')
plt.xlabel('Date')
plt.ylabel('Returns')
plt.title('Ensemble Regression: Prediction vs Actual')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
C:\Users\ericb\AppData\Local\Temp\ipykernel_27552\548374672.py:4: UserWarning: color is redundantly defined by the 'color' keyword argument and the fmt string "g--" (-> color='g'). The keyword argument will take precedence.
  plt.plot(aligned_dates, ensemble_y_pred, 'g--', color='red', label='Ensemble Prediction')
No description has been provided for this image

Visualizing the effect on price of always adjusting, which

In [277]:
# Convert transaction-adjusted returns to prices
ridge_prices_adjusted = [initial_price * (1 + r) for r in np.cumsum(ridge_returns_adj)]
lasso_prices_adjusted = [initial_price * (1 + r) for r in np.cumsum(lasso_returns_adj)]
elasticnet_prices_adjusted = [initial_price * (1 + r) for r in np.cumsum(elasticnet_returns_adj)]
ensemble_prices_adjusted = [initial_price * (1 + r) for r in np.cumsum(ensemble_returns_adj[1:])]
ensemble_prices_adjusted.insert(0, None)  # align shape with other series
actual_prices = [initial_price * (1 + r) for r in np.cumsum(ensemble_y_test)]  # same as before

# Ensure dates align with the predictions
aligned_dates = dates[window_size+1:]

# Plot results for Ridge, Lasso, Elastic Net, and Ensemble (adjusted)
plt.figure(figsize=(10, 6), dpi=300)
plt.plot(aligned_dates, ridge_prices_adjusted, color='red', label='Ridge Adjusted Price')
plt.plot(aligned_dates, ridge_prices, 'g--', color='red', label='Ridge Price (no transaction cost)')
plt.plot(aligned_dates, actual_prices, color='purple', label='Actual Target Price')
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Adjusted Replicated Prices vs Actual Prices (with Transaction Costs)')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

plt.figure(figsize=(10, 6), dpi=300)
plt.plot(aligned_dates, lasso_prices_adjusted, color='green', label='Lasso Adjusted Price')
plt.plot(aligned_dates, lasso_prices, 'g--', color='green', label='Lasso Price (no transaction cost)')
plt.plot(aligned_dates, actual_prices, color='purple', label='Actual Target Price')
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Adjusted Replicated Prices vs Actual Prices (with Transaction Costs)')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Plot results for Ridge, Lasso, Elastic Net, and Ensemble (adjusted)
plt.figure(figsize=(10, 6), dpi=300)
plt.plot(aligned_dates, elasticnet_prices_adjusted, color='orange', label='Elastic Net Adjusted Price')
plt.plot(aligned_dates, elasticnet_prices, 'g--', color='orange', label='Elastic Net (no transaction cost)')
plt.plot(aligned_dates, actual_prices, color='purple', label='Actual Target Price')
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Adjusted Replicated Prices vs Actual Prices (with Transaction Costs)')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Plot results for Ridge, Lasso, Elastic Net, and Ensemble (adjusted)
plt.figure(figsize=(10, 6), dpi=300)
plt.plot(aligned_dates[1:], ensemble_prices_adjusted, color='blue', label='Ensemble Adjusted Price')
plt.plot(aligned_dates, ensemble_prices, 'g--', color='blue', label='Ensemble (no transaction cost)')
plt.plot(aligned_dates, actual_prices, color='purple', label='Actual Target Price')
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Adjusted Replicated Prices vs Actual Prices (with Transaction Costs)')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
C:\Users\ericb\AppData\Local\Temp\ipykernel_27552\408743010.py:15: UserWarning: color is redundantly defined by the 'color' keyword argument and the fmt string "g--" (-> color='g'). The keyword argument will take precedence.
  plt.plot(aligned_dates, ridge_prices, 'g--', color='red', label='Ridge Price (no transaction cost)')
No description has been provided for this image
C:\Users\ericb\AppData\Local\Temp\ipykernel_27552\408743010.py:27: UserWarning: color is redundantly defined by the 'color' keyword argument and the fmt string "g--" (-> color='g'). The keyword argument will take precedence.
  plt.plot(aligned_dates, lasso_prices, 'g--', color='green', label='Lasso Price (no transaction cost)')
No description has been provided for this image
C:\Users\ericb\AppData\Local\Temp\ipykernel_27552\408743010.py:40: UserWarning: color is redundantly defined by the 'color' keyword argument and the fmt string "g--" (-> color='g'). The keyword argument will take precedence.
  plt.plot(aligned_dates, elasticnet_prices, 'g--', color='orange', label='Elastic Net (no transaction cost)')
No description has been provided for this image
C:\Users\ericb\AppData\Local\Temp\ipykernel_27552\408743010.py:53: UserWarning: color is redundantly defined by the 'color' keyword argument and the fmt string "g--" (-> color='g'). The keyword argument will take precedence.
  plt.plot(aligned_dates, ensemble_prices, 'g--', color='blue', label='Ensemble (no transaction cost)')
No description has been provided for this image

Threshold of 0.075 (7.5% change in composition) for re-adjusment

In [284]:
returns = data.iloc[window_size:].drop(columns='Target').values
threshold = 0.075

_, ridge_returns_adj4 = returns_with_costs_etaT(returns, ridge_coefficients, eta=threshold)
ridge_prices_withT4 = [initial_price * (1 + r) for r in np.cumsum(ridge_returns_adj4)]

_, lasso_returns_adj4 = returns_with_costs_etaT(returns, lasso_coefficients, eta=threshold)
lasso_prices_withT4 = [initial_price * (1 + r) for r in np.cumsum(lasso_returns_adj4)]

_, elasticnet_returns_adj4 = returns_with_costs_etaT(returns, elasticnet_coefficients, eta=threshold)
elasticnet_prices_withT4 = [initial_price * (1 + r) for r in np.cumsum(elasticnet_returns_adj4)]

_, ensemble_returns_adj4 = returns_with_costs_etaT(returns[1:], ensemble_coefficients[1:], eta=threshold)
ensemble_prices_withT4 = [initial_price * (1 + r) for r in np.cumsum(ensemble_returns_adj4)]
In [288]:
# Convert transaction-adjusted returns to prices (with eta = 0.04)
ridge_prices_adjusted4 = [initial_price * (1 + r) for r in np.cumsum(ridge_returns_adj4)]
lasso_prices_adjusted4 = [initial_price * (1 + r) for r in np.cumsum(lasso_returns_adj4)]
elasticnet_prices_adjusted4 = [initial_price * (1 + r) for r in np.cumsum(elasticnet_returns_adj4)]
ensemble_prices_adjusted4 = [initial_price * (1 + r) for r in np.cumsum(ensemble_returns_adj4)]
ensemble_prices_adjusted4.insert(0, None)  # align shape with other series

# Target stays the same
actual_prices = [initial_price * (1 + r) for r in np.cumsum(ensemble_y_test)]

# Ensure dates align with the predictions
aligned_dates = dates[window_size+1:]

# Ridge plot
plt.figure(figsize=(10, 6), dpi=300)
plt.plot(aligned_dates, ridge_prices_adjusted4, color='red', label='Ridge Adjusted Price (η=7.5%)')
plt.plot(aligned_dates, ridge_prices_adjusted, linestyle=':', color='red', label='Ridge Adjusted Price (η=0%)')
plt.plot(aligned_dates, ridge_prices, 'g--', color='red', label='Ridge Price (no transaction cost)')
plt.plot(aligned_dates, actual_prices, color='purple', label='Actual Target Price')
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Ridge: Adjusted vs Actual Prices (with Transaction Costs η=7.5%)')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Lasso plot
plt.figure(figsize=(10, 6), dpi=300)
plt.plot(aligned_dates, lasso_prices_adjusted4, color='green', label='Lasso Adjusted Price (η=7.5%)')
plt.plot(aligned_dates, lasso_prices_adjusted, linestyle=':', color='green', label='Lasso Adjusted Price (η=0%)')
plt.plot(aligned_dates, lasso_prices, 'g--', color='green', label='Lasso Price (no transaction cost)')
plt.plot(aligned_dates, actual_prices, color='purple', label='Actual Target Price')
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Lasso: Adjusted vs Actual Prices (with Transaction Costs η=7.5%)')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Elastic Net plot
plt.figure(figsize=(10, 6), dpi=300)
plt.plot(aligned_dates, elasticnet_prices_adjusted4, color='orange', label='Elastic Net Adjusted Price (η=7.5%)')
plt.plot(aligned_dates, elasticnet_prices_adjusted, linestyle=':', color='orange', label='Elastic Net Adjusted Price (η=0%)')
plt.plot(aligned_dates, elasticnet_prices, 'g--', color='orange', label='Elastic Net Price (no transaction cost)')
plt.plot(aligned_dates, actual_prices, color='purple', label='Actual Target Price')
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Elastic Net: Adjusted vs Actual Prices (with Transaction Costs η=7.5%)')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Ensemble plot
plt.figure(figsize=(10, 6), dpi=300)
plt.plot(aligned_dates, ensemble_prices_adjusted4, color='blue', label='Ensemble Adjusted Price (η=7.5%)')
plt.plot(aligned_dates[1:], ensemble_prices_adjusted, linestyle=':', color='blue', label='Ensemble Adjusted Price (η=0%)')
plt.plot(aligned_dates, ensemble_prices, 'g--', color='blue', label='Ensemble Price (no transaction cost)')
plt.plot(aligned_dates, actual_prices, color='purple', label='Actual Target Price')
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Ensemble: Adjusted vs Actual Prices (with Transaction Costs η=7.5%)')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
C:\Users\ericb\AppData\Local\Temp\ipykernel_27552\9076711.py:18: UserWarning: color is redundantly defined by the 'color' keyword argument and the fmt string "g--" (-> color='g'). The keyword argument will take precedence.
  plt.plot(aligned_dates, ridge_prices, 'g--', color='red', label='Ridge Price (no transaction cost)')
No description has been provided for this image
C:\Users\ericb\AppData\Local\Temp\ipykernel_27552\9076711.py:32: UserWarning: color is redundantly defined by the 'color' keyword argument and the fmt string "g--" (-> color='g'). The keyword argument will take precedence.
  plt.plot(aligned_dates, lasso_prices, 'g--', color='green', label='Lasso Price (no transaction cost)')
No description has been provided for this image
C:\Users\ericb\AppData\Local\Temp\ipykernel_27552\9076711.py:46: UserWarning: color is redundantly defined by the 'color' keyword argument and the fmt string "g--" (-> color='g'). The keyword argument will take precedence.
  plt.plot(aligned_dates, elasticnet_prices, 'g--', color='orange', label='Elastic Net Price (no transaction cost)')
No description has been provided for this image
C:\Users\ericb\AppData\Local\Temp\ipykernel_27552\9076711.py:60: UserWarning: color is redundantly defined by the 'color' keyword argument and the fmt string "g--" (-> color='g'). The keyword argument will take precedence.
  plt.plot(aligned_dates, ensemble_prices, 'g--', color='blue', label='Ensemble Price (no transaction cost)')
No description has been provided for this image